#> Info: preparing to read 1 data file(s)...
#> Info: reading file 1/1 'GB_SH1_lowres.cf.rda' with '.rda' reader...
#> Info: loaded data for 59 data files from R Data Archive - checking loaded files for content consistency...
#> Info: encountered 4 problems in total.
#> # A tibble: 4 x 4
#> file_id type func details
#> <chr> <chr> <chr> <chr>
#> 1 504__A5__1_5UL_30ng.dxf error extract_dxf_r… cannot identify measured m…
#> 2 504__A5__1_5UL_30ng.dxf error extract_isoda… object 'mass' not found
#> 3 503____.dxf error extract_dxf_r… cannot identify measured m…
#> 4 503____.dxf error extract_isoda… object 'mass' not found
#> Info: aggregating vendor data table without units from 59 data file(s), including file info 'c(ox = `Seed Oxidation`, everything())'
# add metadata
data_w_metadata <- vdt %>%
iso_add_metadata(metadata_samples, match_by = c(`file_id`))
#> Info: metadata added to 2007 data rows, 0 left without metadata:
#> - 57 metadata entries were mapped to 2007 data entries via column 'file_id'
# missing metadata
data_w_metadata %>%
iso_get_missing_metadata(select = c(Analysis, `file_id`, map_id))
#> Info: fetching data entries that are missing metadata
# map peaks
data_w_peaks_all <- data_w_metadata %>%
# only continue with records that have metadata and are flagged for processing
iso_remove_missing_metadata() %>%
filter(process == "yes") %>%
# map peaks
iso_map_peaks(metadata_peak_maps, file_id = file_id, rt = Rt, rt_start = Start, rt_end = End)
#> Info: removing 0 of 2007 data entries because of missing metadata
#>
# problematic peaks
prob <- data_w_peaks_all %>%
iso_get_problematic_peaks(select = c(file_id, Analysis, compound, Start, Rt, End))
#> Info: fetching 1015 of 2225 data table entries with problematic peak identifications (unidentified, missing or ambiguous)
# focus on non problematic peaks only
data_w_peaks <- data_w_peaks_all %>% iso_remove_problematic_peaks()
#> Info: removing 1015 of 2225 data table entries because of problematic peak identifications (unidentified, missing or ambiguous)
data_w_peaks
data_w_peaks %>%
iso_plot_ref_peaks(
is_ref_condition = is_ref_peak == "yes",
ratio = c(`R 13C/12C`, `R 18O/16O`),
group_id = `Analysis`,
within_group = TRUE
)
Focus on the analytes and calculate a few summary parameters we want to use later.
Add known isotope values for standards.
#> Info: matching standards by 'type' and 'compound' - added 15 standard entries to 285 out of 697 rows
Determine calibrations fits for individual standard analyses.
#> Info: preparing data for calibration by grouping based on 'Analysis'
#> Info: generating 'd13C' calibration based on 1 model ('lm(`d 13C/12C` ~ true_d13C)') for 19 data group(s) with standards filter 'is_standard'. Storing residuals in new column 'd13C_resid'.
#> Info: there were no problematic calibrations
#> Warning in `[<-.data.frame`(`*tmp*`, is_list, value = structure(list(`2` =
#> "<>", : replacement element 1 has 1 row to replace 0 rows
#> Warning in `[<-.data.frame`(`*tmp*`, is_list, value = structure(list(`2` =
#> "<>", : replacement element 2 has 1 row to replace 0 rows
Look at calibration parameters (coefficients and summary) as well as data ranges in overview table.
Visualize the calibration parameters
# unnest some of the data for plotting
data_for_plots <-
stds_w_calibs %>%
iso_unnest_data(
c(Oxidation = `Seed Oxidation`,
datetime = file_datetime,
`Mean Amplitude` = ampl_sample_mean.mV)
)
data_for_plots
# parameters vs time
data_for_plots %>%
iso_plot_calibration_parameters(
calibration = "d13C",
x = datetime,
color = `Mean Amplitude`,
shape = Oxidation
)
# parameter vs analysis
data_for_plots %>%
iso_plot_calibration_parameters(
calibration = "d13C",
x = paste(Analysis),
color = `Mean Amplitude`,
shape = Oxidation
)
# parameters vs amplitude
data_for_plots %>%
iso_plot_calibration_parameters(
calibration = "d13C",
x = `Mean Amplitude`
)
# turn the last plot into an interactive one
ggplotly(ggplot2::last_plot() + theme(legend.position = "none"))
Look at residuals to see goodness of fit for the single analysis calibrations.
stds_w_calibs %>%
# pull out all data
iso_unnest_data(select = everything()) %>%
filter(is_standard) %>%
# calculate deviation from means
group_by(file_id) %>%
mutate(
`Var: residual d13C [permil]` = d13C_resid,
`Var: area diff from mean [%]` = (`Intensity 44`/mean(`Intensity 44`) - 1) * 100,
`Var: amplitude diff from mean [%]` = (`Ampl 44`/mean(`Ampl 44`) - 1) * 100
) %>%
ungroup() %>%
# visualize
iso_plot_data(
x = compound,
y = starts_with("Var"),
group = file_id
) +
# plot modifications
labs(x = "")
ggplotly()
Run global calibrations across all standards.
#> Info: preparing data for calibration by nesting the entire dataset
#> Info: generating 'd13C' calibration based on 4 models ('delta_only = lm(`d 13C/12C` ~ true_d13C)', 'delta_and_ampl = lm(`d 13C/12C` ~ true_d13C + `Ampl 44`)', 'delta_cross_ampl = lm(`d 13C/12C` ~ true_d13C * `Ampl 44`)', 'delta_and_time = lm(`d 13C/12C` ~ true_d13C + file_datetime)') for 1 data group(s) with standards filter 'type == "standard" & is_standard'. Storing residuals in new column 'd13C_resid'.
Visualize the calibration parameters for the different models.
# coefficient summary table
data_w_calibs %>% iso_unnest_calibration_parameters("d13C")
# plot to look at coefficients and summary
data_w_calibs %>%
iso_plot_calibration_parameters(
calibration = "d13C",
x = d13C_calib,
color = d13C_calib,
# include the significance level to gauge which model parameters matter
shape = signif
)
ggplotly()
It looks like the more complex calibration models don’t really add any value here. Check out the residuals to double check. It’s clear from the residuals that the models all perform similarly well across all the standards. Because the simplest model that makes physical sense and captures the variation typically is the best to use, we’re going with the delta_only model in this case. #Garrett thinks delta only looks best here
data_w_calibs %>%
iso_unnest_data(select = everything()) %>%
rename(`Injection Volume` = `Identifier 2`) %>%
filter(type == "standard" & is_standard) %>%
# plot each standard analysis for each of the models to see the differences
iso_plot_data(
x = compound,
y = d13C_resid,
group = paste(Analysis, d13C_calib),
color = d13C_calib
)
Apply the delta_only calibration.
#> Info: applying 'd13C' calibration to infer 'true_d13C' for 1 data group(s); storing resulting value in new column 'true_d13C_pred' and estimated error in new column 'true_d13C_pred_se'. This may take a moment... finished.
The comparison of the predicted value in the standards along with the standard error in the prediction (the inherent error of the global calibration itself, which should be treated as the analytical error) can give a better sense of how precise the data is.
Many different ways exist to visualize the data. Regardless of the plotting method it is helpful to keep in mind that the calibration provides not just the predicted value of a peak but also the standard error based on the calibration and an indicator whether a predicted data point was in the calibration range or not.
data_calibrated %>%
iso_plot_data(
x = Analysis,
y = true_d13C_pred,
y_error = true_d13C_pred_se,
color = true_d13C_pred_in_range,
shape = type,
points = TRUE,
lines = FALSE
) +
# additional features beyond the default plot
facet_wrap(~compound, scales = "free_y") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
labs(color = "in calib. range")
ggplotly()
data_calibrated %>%
filter(compound %in% c("pristane", "phytane", "Sterane", "C15", "C19", "C21", "C23", "C27", "C29", "C31", "C33"))%>%
#, true_d13C_pred_in_range == TRUE
iso_plot_data(
x = Depth,
y = true_d13C_pred,
y_error = true_d13C_pred_se,
color = true_d13C_pred_in_range
) +
# additional features beyond the default plot
facet_wrap(~compound, scales = "free_y") +
geom_point() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
labs(color = "in calib. range") +
scale_x_reverse() +
coord_flip()
#> Warning: Removed 95 rows containing missing values (geom_errorbar).
#> Warning: Removed 95 rows containing missing values (geom_point).
final_lowres <- data_calibrated %>% filter(Analysis != 493) %>% filter(Analysis != 495) %>%
select(
# sample info
`file_id`, `compound`, `Identifier 1`, depth = `Depth`, `measurement_info`, `ampl_sample_mean.mV`, `file_datetime`, `type` ,
# peak info
`Ampl 44`, true_d13C, true_d13C_pred , true_d13C_pred_se, true_d13C_pred_in_range, d13C_resid, true_d13C_pred_in_range
) %>% apply(.,2,as.character)
write.csv(final_lowres, file = "lowres_d13c.csv")